package com.zxk.crawler.task;

import com.zxk.crawler.downloader.UnirestDownLoader;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.BloomFilterDuplicateRemover;
import us.codecraft.webmagic.scheduler.QueueScheduler;

/**
 * @author zxk
 * @description
 * @date 2020/9/28
 */
@Component
public class NovelProcess implements PageProcessor {
    @Override
    public void process(Page page) {
        String s = page.getHtml().toString();
        System.out.println(s);
    }

    private Site site = Site.me()
            .setCharset("utf-8")
            .setTimeOut(10000)
            .setRetryTimes(3)
            .setRetrySleepTime(3000);
    @Override
    public Site getSite() {
        return this.site;
    }

//    @Scheduled(initialDelay = 1000,fixedDelay = 10*1000)
    public void novelProcessor() {
        Downloader downloader = new UnirestDownLoader();
        Spider.create(new NovelProcess())
                .addUrl("https://www.ibiquge.la/40/40401/18554870.html")
                .setDownloader(downloader)
                .setScheduler(new QueueScheduler().setDuplicateRemover(new BloomFilterDuplicateRemover(100000)))
                .thread(5)
                .run();
    }
}
